#---- Load libraries and functions -----


library(tidyverse)
library(ggplot2)
library(scales)
library(gridExtra)
library(grid)
library(jmv)
library(psych)
library(dplyr)
library(tidyr)
library(readxl)
library(sjPlot)
library(ggridges)
library(data.table)
library(RColorBrewer)


#call data 
data = read_excel("journal_coding_data.xlsx")
data = data.frame(data)


#create aggregate data

setDT(data)

data_aggr <- data[ ,list(mean=mean(coder),mean=mean(journal),mean=mean(field),
                         mean=mean(year),
                         mean=mean(analysis_type),max=max(desc),max=max(explore),
                         max=max(survexp),max=max(fieldexp),
                         max=max(subst),max=max(method),max=max(cross),max=max(long)),by=study_id]

View(data_aggr)
colnames(data_aggr) <- c("study_id","coder","journal","field","year","analysis_type","desc",
                         "explore","survexp","fieldexp","subst","method","cross","long")

class(data_aggr) <- class(as.data.frame(data_aggr))
View(data_aggr)

#NO articles

data_aggr %>% ##no of articles
  summarize(n = n())


data_aggr %>% 
  group_by(field) %>% ##no of articles per field
  summarize(n = n())


data %>% ##no of survey use
  summarize(n = n())

data %>% ##no of survey use per field
  group_by(field) %>%
  summarize(n = n())



#labels for field
labels <- as_labeller(c("1"="Political Science","2"="Sociology","3"="Public opinion"))
#labels for rr
labels_rr <- as_labeller(c("1"="No information","2"="Partial information – other rates",
                             	"3"="Given, not defined",
                             "4"="Given, defined, but not AAPOR standard",
                             "5"="Given, AAPOR standard used"))

#calculating missing data
data$year_survey_na <- apply(data[, 'year_survey'], 1, function(x) sum(is.na(x)))
data$location_na <- apply(data[, 'location'], 1, function(x) sum(is.na(x)))
data$target_na <- apply(data[, 'target'], 1, function(x) sum(is.na(x)))
data$sample_size_na <- apply(data[, 'sample_size'], 1, function(x) sum(is.na(x)))
data$mode_na <- apply(data[, 'mode'], 1, function(x) sum(is.na(x)))


##index

#total

transindex <- subset(data,wording<99) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex

transindex <- subset(data,wording<99) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  group_by(year) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex

p <-ggplot(transindex,aes(x=as.factor(year),y=mean,group=1))+
  geom_line()+
  ggtitle("") +
  labs(x=NULL,y="Transparency index score",colour=NULL)+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
                position=position_dodge(0.05))+
  geom_point()+
  theme_minimal() +
  ylim(4,10.5)+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),text=element_text(size=16,family="Times New Roman"))
p
ggsave("trans_time.png",p)

#per field

transindex <- subset(data,wording<99) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  group_by(field) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex


transindex <- subset(data,wording<99) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  group_by(year,field) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex


p <-ggplot(transindex,aes(x=as.factor(year),y=mean,col=as.factor(field),group=1))+
  facet_wrap(~field,labeller=labels)+
  geom_line()+
  ggtitle("") +
  labs(x=NULL,y="Transparency index score",colour=NULL)+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
                position=position_dodge(0.05))+
  geom_point()+
  scale_x_discrete(breaks=c("2011","2021"))+
  scale_color_manual(values = c("grey35", "grey50","grey75"))+
  theme_minimal() +
  ylim(4,10.5)+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),legend.position = "none",
        plot.title = element_text(hjust = 0.5),text=element_text(size=18,family="Times New Roman"))
p
ggsave("trans_time_field.png",h=8,p)



#per prob


transindex <- subset(data,wording<99 & !is.na(probability)) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  group_by(probability) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex

p <-ggplot(transindex,aes(x=as.factor(probability),y=mean,col=as.factor(probability)))+
  geom_line()+
  geom_point()+
  scale_x_discrete(labels=c("Probability-based","Non-probability-based"))+
  scale_color_manual(values = c("grey45","grey85"))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
                position=position_dodge(0.05))+
  theme_minimal() +
  ylim(4,10.5)+
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),legend.position = "none",
        plot.title = element_text(hjust = 0.5),text=element_text(size=12,family="Times New Roman"))
p

#per mode


transindex <- subset(data,wording<99 & !is.na(mode)) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  group_by(mode) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex

p <-ggplot(transindex,aes(x=as.factor(mode),y=mean,col=as.factor(mode)))+
  geom_line()+
  geom_point()+
  scale_x_discrete(labels=c("Paper","F2F","Phone","Web","Mixed-mode"))+
  scale_color_manual(values = c("grey25","grey45","grey55","grey65","grey85"))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
                position=position_dodge(0.05))+
  theme_minimal() +
  ylim(4,10.5)+
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),legend.position = "none",
        plot.title = element_text(hjust = 0.5),text=element_text(size=12,family="Times New Roman"))
p


#per focus


transindex <- subset(data,wording<99) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  mutate(trans_index=if_else(year_survey_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(location_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(target_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(sample_size_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(frame==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(sampling_design==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(mode_na==1,0,1)) %>%
  mutate(trans_index=trans_index+if_else(rr_bin==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(name_company==1,1,0)) %>%
  mutate(trans_index=trans_index+if_else(wording_bin==1,1,0)) %>%
  group_by(focus) %>%
  summarize(mean=mean(trans_index),sd=sd(trans_index))
transindex

p <-ggplot(transindex,aes(x=as.factor(focus),y=mean,col=as.factor(focus)))+
  geom_line()+
  geom_point()+
  scale_x_discrete(labels=c("Substantive","Method"))+
  scale_color_manual(values = c("grey45","grey85"))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.2,
                position=position_dodge(0.05))+
  theme_minimal() +
  ylim(4,10.5)+
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),legend.position = "none",
        plot.title = element_text(hjust = 0.5),text=element_text(size=12,family="Times New Roman"))
p


#details

year_survey <- data %>%
  group_by(year_survey_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>%
  mutate(var="Year of data collection") %>%
  filter(year_survey_na==1)
year_survey


location <- data %>%
  group_by(location_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Location") %>%
  filter(location_na==1)
location


target <- data %>%
  group_by(target_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Target population") %>%
  filter(target_na==1)
target


sample_size <- data %>%
  group_by(sample_size_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Sample size") %>%
  filter(sample_size_na==1)
sample_size


frame <- data %>%
  group_by(frame) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Sampling frame") %>%
  filter(frame==1)
frame


design <- data %>%
  group_by(sampling_design) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Sampling design") %>%
  filter(sampling_design==1)
design


mode <- data %>%
  group_by(mode_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Mode") %>%
  filter(mode_na==1)
mode


rr <- data %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  group_by(rr_bin) %>%
  summarize(n = n()) %>%
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Response rate") %>%
  filter(rr_bin==1)
rr

name <- data %>%
  group_by(name_company) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Name of field company") %>%
  filter(name_company==1)
name

wording <- subset(data,wording<99) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  group_by(wording_bin) %>%
  summarize(n = n()) %>%
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Wording of key questions") %>%
  filter(wording_bin==1)
wording

joint <- year_survey %>%
  full_join(location) %>%
  full_join(target) %>%
  full_join(sample_size) %>%
  full_join(frame) %>%
  full_join(design) %>% 
  full_join(mode) %>% 
full_join(rr) %>% 
  full_join(name) %>%
  full_join(wording)

joint

joint$var <- factor(joint$var,      # Reordering group factor levels
                    levels = c("Year of data collection","Location","Target population",
                               "Sample size","Sampling frame","Sampling design",
                               "Mode","Response rate","Name of field company",
                               "Wording of key questions"))


p <-ggplot(joint,aes(x=reorder(var,var,desc), y=pct)) +
  geom_col(position = 'identity') +
  geom_text(aes(label=percent(pct,accuracy=0.1)),position = position_dodge(width = 1),
            hjust=-0.1,family="Times New Roman",size=3)+
  scale_y_continuous(labels=percent_format(accuracy = 1),expand = c(0, 0.1))+
  labs(x=NULL,y=NULL)+
  theme_bw()+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),text=element_text(size=12,family="Times New Roman"),
        axis.text.x=element_text(size=8,family="Times New Roman"))+coord_flip()
p
ggsave("detail_total.png",p)

##per year


year_survey <- data %>%
  group_by(year,year_survey_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Year of data collection") %>%
  filter(year_survey_na==1)
year_survey

location <- data %>%
  group_by(year, location_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  ungroup %>%
  complete(year,location_na, fill = list(pct = 1,lbl = "100%")) %>% 
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  
  mutate(var="Location") %>%
  filter(location_na==1)
location

target <- data %>%
  group_by(year, target_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  ungroup %>%
  complete(year,target_na, fill = list(pct = 1,lbl = "100%")) %>% 
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Target population") %>%
  filter(target_na==1)
target

sample_size <- data %>%
  group_by(year, sample_size_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Sample size") %>%
  filter(sample_size_na==1)
sample_size

frame <- data %>%
  group_by(year, frame) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  ungroup %>%
  complete(year,frame, fill = list(pct = 1,lbl = "100%")) %>%
  mutate(var="Sampling frame") %>%
  filter(frame==1)
frame


design <- data %>%
  group_by(year, sampling_design) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(var="Sampling design") %>%
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  filter(sampling_design==1)
design

mode <- data %>%
  group_by(year, mode_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Mode") %>%
  filter(mode_na==1)
mode

rr <- data %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  group_by(year, rr_bin) %>%
  summarize(n = n()) %>%
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Response rate") %>%
  filter(rr_bin==1)
rr

name <- data %>%
  group_by(year, name_company) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Name of field company") %>%
  filter(name_company==1)
name

wording <- subset(data,wording<99) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  group_by(year, wording_bin) %>%
  summarize(n = n()) %>%
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  mutate(var="Wording of key questions") %>%
  filter(wording_bin==1)
wording

joint <- year_survey %>%
  full_join(location) %>%
  full_join(target) %>%
  full_join(sample_size) %>%
  full_join(frame) %>%
  full_join(design) %>% 
  full_join(mode) %>% 
  full_join(rr) %>% 
  full_join(name) %>%
  full_join(wording)

View(joint)
joint$var <- factor(joint$var,      # Reordering group factor levels
                         levels = c("Year of data collection","Location","Target population",
                                    "Sample size","Sampling frame","Sampling design",
                                    "Mode","Response rate","Name of field company",
                                    "Wording of key questions"))

p <-ggplot(joint,aes(x=year,y=pct,group=var,label=lbl2))+
  geom_line()+facet_wrap(~var,nrow=4)+
  geom_text(aes(label=lbl2),vjust=-0.5,size=4,
            family="Times New Roman")+
  scale_y_continuous(labels=percent,limits=c(0,1.1),
                     breaks=c(0,0.25,0.5,0.75,1))+
  scale_x_continuous(labels=c("2011","2013","2015","2017","2019","2021"),
                   limits=c(2011,2021),
                   breaks=c(2011,2013,2015,2017,2019,2021),expand=c(0,1))+
 ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  theme_minimal() +
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        plot.title = element_text(hjust = 0.5),
        text=element_text(size=16,family="Times New Roman"))
p
ggsave("detail_time.png",p,w=10,h=12)

##per field


year_survey <- data %>%
  group_by(field,year_survey_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>%
  mutate(var="Year of data collection") %>%
  filter(year_survey_na==1)
year_survey

location <- data %>%
  group_by(field, location_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Location") %>%
  filter(location_na==1)
location

target <- data %>%
  group_by(field, target_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Target population") %>%
  filter(target_na==1)
target

sample_size <- data %>%
  group_by(field, sample_size_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Sample size") %>%
  filter(sample_size_na==1)
sample_size

frame <- data %>%
  group_by(field, frame) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  ungroup %>%
  complete(field,frame, fill = list(pct = 1,lbl = "100%")) %>%
  mutate(var="Sampling frame") %>%
  filter(frame==1)
frame


design <- data %>%
  group_by(field, sampling_design) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  ungroup %>%
  complete(field,sampling_design, fill = list(pct = 1,lbl = "100%")) %>%
  mutate(var="Sampling design") %>%
  filter(sampling_design==1)
design

mode <- data %>%
  group_by(field, mode_na) %>%
  summarize(n = n()) %>% 
  mutate(pct = 1-(n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Mode") %>%
  filter(mode_na==1)
mode

rr <- data %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  group_by(field, rr_bin) %>%
  summarize(n = n()) %>%
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Response rate") %>%
  filter(rr_bin==1)
rr

name <- data %>%
  group_by(field, name_company) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Name of field company") %>%
  filter(name_company==1)
name

wording <- subset(data,wording<99) %>%
  mutate(wording_bin=recode(wording,"3"="0", "2"="1", "1"="1")) %>%
  group_by(field, wording_bin) %>%
  summarize(n = n()) %>%
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy = 0.1)) %>%
  mutate(var="Wording of key questions") %>%
  filter(wording_bin==1)
wording

joint <- year_survey %>%
  full_join(location) %>%
  full_join(target) %>%
  full_join(sample_size) %>%
  full_join(frame) %>%
  full_join(design) %>% 
  full_join(mode) %>% 
  full_join(rr) %>% 
  full_join(name) %>%
  full_join(wording)

joint

joint$var <- factor(joint$var,      # Reordering group factor levels
                    levels = c("Year of data collection","Location","Target population",
                               "Sample size","Sampling frame","Sampling design",
                               "Mode","Response rate","Name of field company",
                               "Wording of key questions"))

joint$field <- as.character(joint$field)

joint$field <- factor(joint$field,      # Reordering group factor levels
                    levels = c("3","2","1"))

p <-ggplot(joint,aes(x=(var), y=pct,fill=field)) +
  geom_bar(position=position_dodge(0.8),stat='identity',width=0.8) +
  geom_text(aes(label=percent(pct,accuracy=0.1)),position = position_dodge(width = 1),
            hjust=-0.1,family="Times New Roman",size=4)+
  scale_y_continuous(breaks=c(0,0.25,0.5,0.75,1),
                     labels=percent_format(accuracy = 1),expand = c(0,0.2))+
  scale_fill_manual(values = c("grey85", "grey60","grey45"),name = "",labels=labels) +
  scale_x_discrete(limits=rev)+
  labs(x=NULL,y=NULL)+
  guides(fill = guide_legend(reverse=TRUE))+
  theme_bw()+
  theme(panel.border = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(size=10),
        text=element_text(size=16,family="Times New Roman"))+coord_flip()

p
ggsave("fre_field.png",p)


##RR

data %>%
  group_by(rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1))

rr <- data %>%
  group_by(field,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>%
  ggplot(aes(x=as.factor(field), y=pct,fill=as.factor(rr_type))) +
  geom_bar(position="stack",stat='identity',width=0.8)+
  geom_text(aes(label=percent(pct,accuracy=0.1)),
            family="Times New Roman",size=4,position = position_stack(vjust = .5))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),
                     labels=percent_format(accuracy = 1))+
  scale_x_discrete(labels=labels)+
  scale_fill_manual(values = c("grey85","grey75","grey65","grey55","grey45"),
                    labels=c("No information","Partial information – other rates",
                             	"Given, not defined",
                             "Given, defined, but not AAPOR standard",
                             "Given, AAPOR standard used"),name="")+
  labs(x=NULL,y=NULL)+
  theme_bw()+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5),
        text=element_text(size=16,family="Times New Roman"))
rr
ggsave("rr_fre.png",rr)

##rr time

rr <- data %>%
  group_by(year,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>% 
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  ggplot(aes(x=as.factor(year),y=pct,group=rr_type))+
  geom_line()+facet_wrap(~rr_type,nrow=3,labeller=labels_rr)+
  geom_text(aes(label=lbl2),vjust=-0.5,size=4,
      family="Times New Roman")+
  scale_y_continuous(labels=percent,limits=c(0,1))+
  scale_x_discrete(breaks=c("2011","2013","2015","2017","2019","2021"),
                   labels=c("2011","2013","2015","2017","2019","2021"),
                   expand=c(0.1,0.1))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  theme_minimal() +
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text.y = element_text(size=10),
        text=element_text(size=16,family="Times New Roman"))
rr
ggsave("rr_time.png",rr,h=7)


##RR prob vs non prob. only
rr <- subset(data,!is.na(probability)) %>%
  group_by(year,probability,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>% 
  ungroup %>%
  complete(year,probability,rr_type, fill = list(pct = 0,lbl = "0%")) %>% 
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  ggplot(aes(x=as.factor(year),y=pct,group=rr_type))+
  geom_line(aes(group=probability,linetype=as.factor(probability)))+
  facet_wrap(~rr_type,nrow=3,labeller=labels_rr)+
  scale_linetype_manual(name="",
                  labels=c("Probability-based","Non-probability-based"),
                                         values=c("solid","dashed"))+
  scale_y_continuous(labels=percent,limits=c(0,1))+
  scale_x_discrete(breaks=c("2011","2013","2015","2017","2019","2021"),
                   labels=c("2011","2013","2015","2017","2019","2021"),
                   expand=c(0.1,0.1))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  theme_minimal() +
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text.y = element_text(size=10),
        panel.grid.minor.y = element_blank(),
        text=element_text(size=16,family="Times New Roman"))
rr
ggsave("rr_time_prob.png",rr,h=7,w=10)



##per field

rr <- data %>%
  group_by(year,field,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>% 
  ungroup %>%
  complete(year,field,rr_type, fill = list(pct = 0,lbl = "0%")) %>% 
  mutate(lbl2=case_when(year %in% 2012:2020 ~ "0",
                        TRUE ~lbl)) %>% 
  mutate(lbl2=na_if(lbl2,"0")) %>% 
  ggplot(aes(x=as.factor(year),y=pct,group=rr_type))+
  geom_line(aes(group=field,linetype=as.factor(field)))+
  facet_wrap(~rr_type,nrow=3,labeller=labels_rr)+
  scale_linetype_manual(name="",
                        labels=labels,
                        values=c("solid","dashed","dotted"))+
  scale_y_continuous(labels=percent,limits=c(0,1))+
  scale_x_discrete(breaks=c("2011","2013","2015","2017","2019","2021"),
                   labels=c("2011","2013","2015","2017","2019","2021"),
                   expand=c(0.1,0.1))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  theme_minimal() +
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.grid.minor.y = element_blank(),
        axis.text.y = element_text(size=10),
        text=element_text(size=16,family="Times New Roman"))
rr
ggsave("rr_time_field.png",rr,h=7,w=10)

View(subset(data,field==3) %>%
  group_by(year,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)))

View(subset(data,field==3 & probability==1) %>%
  group_by(year,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)))

rr <- subset(data,field==3 & probability==2) %>%
  group_by(year,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>% 
  ggplot(aes(x=as.factor(year),y=pct,group=rr_type))+
  geom_line()+facet_wrap(~rr_type,nrow=3,labeller=labels_rr)+
  scale_y_continuous(labels=percent)+
  scale_x_discrete(breaks=c("2011","2021"), labels=c("2011","2021"))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  theme_minimal() +
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),text=element_text(size=12,family="Times New Roman"))
rr


rr <- subset(data,probability==1) %>%
  mutate(rr_bin=recode(rr_type,"1"="0", "2"="0", "3"="1","4"="1","5"="1")) %>%
  group_by(year,field,rr_bin) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>%
  filter(rr_bin==1) %>%
  ggplot(aes(x=(year),y=pct))+
  geom_line()+facet_wrap(~field,labeller=labels)+
  scale_y_continuous(labels=percent)+
  scale_x_discrete(breaks=c("2011","2021"), labels=c("2011","2021"))+
  ggtitle("") +
  labs(x=NULL,y=NULL,colour=NULL)+
  theme_minimal() +
  ylab("")+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_text(hjust = 0.5),text=element_text(size=12,family="Times New Roman"))
rr

##mode


rr <- subset(data,!is.na(mode)) %>%
  group_by(mode,rr_type) %>%
  summarize(n = n()) %>% 
  mutate(pct = (n/sum(n)),
         lbl = scales::percent(pct,accuracy=0.1)) %>%
  ggplot(aes(x=as.factor(mode), y=pct,fill=as.factor(rr_type))) +
  geom_bar(position="stack",stat='identity',width=0.8)+
  geom_text(aes(label=percent(pct,accuracy=0.1)),
            family="Times New Roman",size=4,position = position_stack(vjust = .5))+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),
                     labels=percent_format(accuracy = 1),expand = c(0, 0.15))+
  scale_fill_manual(values = c("grey85","grey75","grey65","grey55","grey45"),
                    labels=c("No information","Partial information – other rates",
                             "Given, not defined",
                             "Given, defined,\nbut not AAPOR standard",
                             "Given, AAPOR standard used"),name="")+
  scale_x_discrete(labels=c("Postal","F2F","Telephone","Online","Mixed-mode"))+
  labs(x=NULL,y=NULL)+
  theme_bw()+
  theme(panel.border = element_blank(),panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5),
        text=element_text(size=16,family="Times New Roman"),)
rr

ggsave("rr_fre_mode.png",rr)
